In this script, I fit simplified models to density data so that I can predict densities on the condition data and pred grid
rm(list = ls())
library(tidyverse); theme_set(theme_light(base_size = 12))
library(readxl)
library(tidylog)
library(RCurl)
library(viridis)
library(RColorBrewer)
library(patchwork)
library(janitor)
library(icesDatras)
library(mapdata)
library(patchwork)
library(rgdal)
library(raster)
library(sf)
library(rgeos)
library(chron)
library(lattice)
library(ncdf4)
library(marmap)
library(rnaturalearth)
library(rnaturalearthdata)
library(mapplots)
library(geosphere)
#remotes::install_github("pbs-assess/sdmTMB")
library(sdmTMB)
# To load entire cache in interactive r session, do:
# qwraps2::lazyload_cache_dir(path = "R/clean_data/cod_fle_density_models_as_covars_cache/html")
world <- ne_countries(scale = "medium", returnclass = "sf")
# Specify map ranges
ymin = 54; ymax = 58; xmin = 12; xmax = 22
map_data <- rnaturalearth::ne_countries(
scale = "medium",
returnclass = "sf", continent = "europe")
# Crop the polygon for plotting and efficiency:
# st_bbox(map_data) # find the rough coordinates
swe_coast <- suppressWarnings(suppressMessages(
st_crop(map_data,
c(xmin = xmin, ymin = ymin, xmax = xmax, ymax = ymax))))
# Transform our map into UTM 33 coordinates, which is the equal-area projection we fit in:
utm_zone33 <- 32633
swe_coast_proj <- sf::st_transform(swe_coast, crs = utm_zone33)
#ggplot(swe_coast_proj) + geom_sf()
d <- readr::read_csv("https://raw.githubusercontent.com/maxlindmark/cod_condition/master/data/for_analysis/mdat_cpue.csv")
# Filter to match the data I want to predict on
d <- d %>% filter(year > 1992 & year < 2020)
# Calculate standardized variables
d <- d %>%
mutate(depth_sc = (depth - mean(depth))/sd(depth)) %>%
mutate(year = as.integer(year)) %>%
drop_na(depth) %>%
rename("density_cod" = "density") # to fit better with how flounder is named
# Explore data a bit
ggplot(d) +
geom_point(aes(year, density_cod, color = factor(sub_div))) +
facet_wrap(~sub_div)
# Explore data a bit
ggplot(d) +
geom_point(aes(year, density_fle, color = factor(sub_div))) +
facet_wrap(~sub_div)
# Make mesh:
spde <- make_mesh(d, xy_cols = c("X", "Y"),
n_knots = 150,
type = "kmeans", seed = 42)
plot(spde)
# Depth spline + oxy spline
# Cod model
mcod <- sdmTMB(density_cod ~ 0 + as.factor(year) + s(depth_sc),
data = d, mesh = spde, family = tweedie(link = "log"),
spatiotemporal = "AR1", spatial = "on", time = "year",
reml = FALSE, control = sdmTMBcontrol(newton_steps = 1))
#> Warning: Detected a `s()` smoother. Smoothers are penalized in sdmTMB as
#> of version 0.0.19, but used to be unpenalized.
#> You no longer need to specify `k` since the degree of wiggliness
#> is determined by the data.
tidy(mcod, conf.int = TRUE)
#> term estimate std.error conf.low conf.high
#> 1 as.factor(year)1993 4.454140 0.4253550 3.620459 5.287820
#> 2 as.factor(year)1994 4.793433 0.4023658 4.004810 5.582055
#> 3 as.factor(year)1995 5.079760 0.3998733 4.296023 5.863497
#> 4 as.factor(year)1996 5.145407 0.4004867 4.360468 5.930346
#> 5 as.factor(year)1997 4.010807 0.3998520 3.227111 4.794502
#> 6 as.factor(year)1998 4.280269 0.3978222 3.500552 5.059987
#> 7 as.factor(year)1999 4.290515 0.3970853 3.512242 5.068788
#> 8 as.factor(year)2000 4.576831 0.3971113 3.798507 5.355154
#> 9 as.factor(year)2001 4.957168 0.3842108 4.204129 5.710208
#> 10 as.factor(year)2002 5.363282 0.3818835 4.614804 6.111760
#> 11 as.factor(year)2003 4.553489 0.4017928 3.765989 5.340988
#> 12 as.factor(year)2004 4.210470 0.3941435 3.437963 4.982977
#> 13 as.factor(year)2005 5.057329 0.3753104 4.321734 5.792923
#> 14 as.factor(year)2006 4.985293 0.3653520 4.269216 5.701369
#> 15 as.factor(year)2007 4.976487 0.3645525 4.261977 5.690997
#> 16 as.factor(year)2008 5.261581 0.3629398 4.550232 5.972930
#> 17 as.factor(year)2009 5.359733 0.3621358 4.649960 6.069507
#> 18 as.factor(year)2010 5.252602 0.3642944 4.538598 5.966606
#> 19 as.factor(year)2011 4.619708 0.3652426 3.903845 5.335570
#> 20 as.factor(year)2012 4.397305 0.3688655 3.674342 5.120268
#> 21 as.factor(year)2013 4.741467 0.3655587 4.024985 5.457949
#> 22 as.factor(year)2014 4.705363 0.3682657 3.983576 5.427150
#> 23 as.factor(year)2015 4.601450 0.3681050 3.879977 5.322922
#> 24 as.factor(year)2016 4.340151 0.3685396 3.617826 5.062475
#> 25 as.factor(year)2017 4.527776 0.3662835 3.809874 5.245679
#> 26 as.factor(year)2018 4.167567 0.3677799 3.446731 4.888402
#> 27 as.factor(year)2019 4.164741 0.3994959 3.381744 4.947739
# Check residuals of models
d$residualsmcod <- residuals(mcod)
qqnorm(d$residualsmcod); abline(a = 0, b = 1)
# Save model (so that I can add density predictions to the pred grid for condition)
saveRDS(mcod, "output/mcod.rds")
# Fit flounder model
mfle <- sdmTMB(density_fle ~ 0 + as.factor(year) + s(depth_sc),
data = d, mesh = spde, family = tweedie(link = "log"),
spatiotemporal = "AR1", spatial = "on", time = "year",
reml = FALSE, control = sdmTMBcontrol(newton_steps = 1))
#> Warning: Detected a `s()` smoother. Smoothers are penalized in sdmTMB as
#> of version 0.0.19, but used to be unpenalized.
#> You no longer need to specify `k` since the degree of wiggliness
#> is determined by the data.
tidy(mfle, conf.int = TRUE)
#> term estimate std.error conf.low conf.high
#> 1 as.factor(year)1993 3.949785 0.3918821 3.181711 4.717860
#> 2 as.factor(year)1994 3.907088 0.3762462 3.169659 4.644517
#> 3 as.factor(year)1995 4.226126 0.3737753 3.493540 4.958712
#> 4 as.factor(year)1996 4.096355 0.3747133 3.361930 4.830779
#> 5 as.factor(year)1997 3.595180 0.3727111 2.864680 4.325680
#> 6 as.factor(year)1998 3.563202 0.3695285 2.838940 4.287465
#> 7 as.factor(year)1999 3.770240 0.3664361 3.052038 4.488442
#> 8 as.factor(year)2000 3.969409 0.3689576 3.246265 4.692552
#> 9 as.factor(year)2001 4.396938 0.3568249 3.697574 5.096302
#> 10 as.factor(year)2002 4.783574 0.3526219 4.092447 5.474700
#> 11 as.factor(year)2003 4.040726 0.3649964 3.325346 4.756106
#> 12 as.factor(year)2004 4.060109 0.3581775 3.358094 4.762124
#> 13 as.factor(year)2005 4.169854 0.3476480 3.488477 4.851232
#> 14 as.factor(year)2006 4.403522 0.3390383 3.739019 5.068025
#> 15 as.factor(year)2007 4.615532 0.3368432 3.955331 5.275732
#> 16 as.factor(year)2008 4.793969 0.3366126 4.134221 5.453718
#> 17 as.factor(year)2009 4.660473 0.3365410 4.000865 5.320082
#> 18 as.factor(year)2010 4.808711 0.3362852 4.149604 5.467818
#> 19 as.factor(year)2011 4.436690 0.3362290 3.777693 5.095687
#> 20 as.factor(year)2012 4.462359 0.3384255 3.799057 5.125660
#> 21 as.factor(year)2013 4.585104 0.3371744 3.924254 5.245953
#> 22 as.factor(year)2014 4.249861 0.3403799 3.582729 4.916994
#> 23 as.factor(year)2015 4.227479 0.3399941 3.561103 4.893855
#> 24 as.factor(year)2016 4.232719 0.3394271 3.567454 4.897984
#> 25 as.factor(year)2017 4.382421 0.3384405 3.719090 5.045752
#> 26 as.factor(year)2018 4.267328 0.3406859 3.599596 4.935060
#> 27 as.factor(year)2019 4.449725 0.3635740 3.737133 5.162317
# Check residuals of models
d$residualsmfle <- residuals(mfle)
qqnorm(d$residualsmfle); abline(a = 0, b = 1)
# Save model (so that I can add density predictions to the pred grid for condition)
saveRDS(mfle, "output/mfle.rds")
# Read in pred_grid2 which has oxygen values at location and time and depth:
pred_grid2 <- readr::read_csv("https://raw.githubusercontent.com/maxlindmark/cod_condition/master/data/for_analysis/pred_grid2.csv")
#>
#> ── Column specification ────────────────────────────────────────────────────────
#> cols(
#> X = col_double(),
#> Y = col_double(),
#> depth = col_double(),
#> year = col_double(),
#> deep = col_character(),
#> oxy = col_double(),
#> temp = col_double(),
#> lon = col_double(),
#> lat = col_double(),
#> biomass_saduria = col_double(),
#> subdiv = col_character(),
#> subdiv2 = col_character(),
#> sub_div = col_double(),
#> ices_rect = col_character()
#> )
# Standardize data with respect to prediction grid:
pred_grid2 <- pred_grid2 %>%
mutate(year = as.integer(year)) %>%
filter(year %in% c(unique(d$year))) %>%
mutate(depth_sc = (depth - mean(d$depth))/sd(d$depth),
temp_sc = (temp - mean(d$temp))/sd(d$temp),
oxy_sc = (oxy - mean(d$oxy))/sd(d$oxy)) %>% # Need to scale these to the mean and sd in the data!
drop_na(oxy, depth, temp)
#> mutate: converted 'year' from double to integer (0 new NA)
#> filter: no rows removed
#> mutate: new variable 'depth_sc' (double) with 6,429 unique values and 0% NA
#> new variable 'temp_sc' (double) with one unique value and 100% NA
#> new variable 'oxy_sc' (double) with 250,804 unique values and 2% NA
#> drop_na: removed 6,615 rows (3%), 249,453 rows remaining
# Predict on the pred grid, calculate index
preds_fle <- predict(mfle, newdata = pred_grid2, return_tmb_object = TRUE, area = 2*2)
index <- get_index(preds_fle, bias_correct = FALSE)
index <- index %>% mutate(est_t = est/1000, lwr_t = lwr/1000, upr_t = upr/1000)
#> mutate: new variable 'est_t' (double) with 27 unique values and 0% NA
#> new variable 'lwr_t' (double) with 27 unique values and 0% NA
#> new variable 'upr_t' (double) with 27 unique values and 0% NA
# Plot index
ggplot() +
geom_line(data = index, aes(year, est_t, color = "blue")) +
geom_ribbon(data = index, aes(year, ymin = lwr_t, ymax = upr_t, fill = "blue"), alpha = 0.4) +
scale_x_continuous(breaks = scales::pretty_breaks(n = 20)) +
scale_color_brewer(palette = "Dark2") +
scale_fill_brewer(palette = "Dark2") +
guides(color = FALSE) +
labs(x = "Year", y = "Tonnes", fill = "Sub Divisions", label = "") +
theme_light(base_size = 11) +
theme(axis.text.x = element_text(angle = 30),
legend.position = c(0.8, 0.8),
legend.background = element_blank()) +
NULL
#> Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
#> "none")` instead.
# Calculate an index that corresponds to the average so that I can compare it to the data
ncells <- filter(pred_grid2, year == max(pred_grid2$year)) %>% nrow()
#> filter: removed 240,214 rows (96%), 9,239 rows remaining
preds_fle_ave <- predict(mfle, newdata = pred_grid2, return_tmb_object = TRUE, area = 1/ncells)
index_ave <- get_index(preds_fle_ave, bias_correct = FALSE)
d_sum <- d %>%
ungroup() %>%
group_by(year) %>%
summarise(mean_density_fle = mean(density_fle))
#> ungroup: no grouping variables
#> group_by: one grouping variable (year)
#> summarise: now 27 rows and 2 columns, ungrouped
ggplot(index_ave, aes(year, est, color = "prediction")) +
geom_line() +
geom_ribbon(aes(ymin = lwr, ymax = upr, fill = "prediction"), alpha = 0.1) +
geom_point(data = d_sum, aes(year, mean_density_fle, color = "data", size = 1.1)) +
geom_line(data = d_sum, aes(year, mean_density_fle, color = "data"), linetype = 2) +
scale_color_brewer(palette = "Dark2") +
scale_fill_brewer(palette = "Dark2") +
guides(fill = FALSE) +
labs(x = "Year", y = expression(paste("Density [kg/", km^2, "]", sep = ""))) +
NULL
#> Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
#> "none")` instead.
# Plot predictions on map
ggplot(swe_coast_proj) +
geom_raster(data = preds_fle$data, aes(x = X * 1000, y = Y * 1000, fill = exp(est))) +
geom_sf(size = 0.3) +
scale_fill_viridis_c(trans = "sqrt") +
facet_wrap(~ year, ncol = 5) +
labs(x = "Longitude", y = "Latitude", fill = expression(kg/km^2)) +
ggtitle("Predicted density (fixed + random)")
## Interesting! Now predict these model on dat - i.e. the condition data so that I can use those as covariates